1 Background data

Load & check boundary data for Solent area. The table shows which LAs we expect to plot.

#The LSOA boundaries for the Solent have been pre-downloaded
inf<-paste0(dataFolder, "boundaries/LSOA/lsoa_solent.shp")
message ("Loading LSOA boundaries from file")
## Loading LSOA boundaries from file
lsoa_sf <- sf::read_sf(inf)
head(lsoa_sf)
## Simple feature collection with 6 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 465991.4 ymin: 100849 xmax: 467770.5 ymax: 102360.8
## Projected CRS: OSGB 1936 / British National Grid
## # A tibble: 6 × 16
##   LSOA11CD  OBJECTID LSOA11NM    LSOA11NMW   BNG_E  BNG_N  LONG   LAT Shape__Are
##   <chr>        <int> <chr>       <chr>       <int>  <int> <dbl> <dbl>      <dbl>
## 1 E01017013    16519 Portsmouth… Portsmout… 466127 101741 -1.06  50.8    145134.
## 2 E01017014    16520 Portsmouth… Portsmout… 466924 101957 -1.05  50.8    757371.
## 3 E01017015    16521 Portsmouth… Portsmout… 467084 101499 -1.05  50.8    472647.
## 4 E01017016    16522 Portsmouth… Portsmout… 466330 101597 -1.06  50.8    122927.
## 5 E01017017    16523 Portsmouth… Portsmout… 466813 101069 -1.05  50.8    166924.
## 6 E01017018    16524 Portsmouth… Portsmout… 466350 101184 -1.06  50.8    184945.
## # … with 7 more variables: Shape__Len <dbl>, MSOA11CD <chr>, MSOA11NM <chr>,
## #   LAD11CD <chr>, LAD11NM <chr>, nLSOAs <int>, geometry <MULTIPOLYGON [m]>
table(lsoa_sf$LAD11NM) # How many LSOAs are there in each LA?
## 
## Basingstoke and Deane        East Hampshire             Eastleigh 
##                   109                    72                    77 
##               Fareham               Gosport                  Hart 
##                    73                    53                    57 
##                Havant         Isle of Wight            New Forest 
##                    78                    89                   114 
##            Portsmouth           Southampton           Test Valley 
##                   125                   148                    71 
##            Winchester 
##                    70
st_coord_sys <- st_crs(lsoa_sf)
st_coord_sys$epsg
## [1] 27700
#Trasforming the coord systen
if(st_coord_sys$epsg !=4326){
  lsoa_sf_leaflet <- st_transform(lsoa_sf, "+proj=longlat +datum=WGS84")
}

Loading deprivation data and double checking what the IMD Decile means - NB this is all LSOAs not just Solent

inFile<-paste0(dataFolder, "Indices of Multiple Deprivation/Indices_of_Multiple_Deprivation_(IMD)_2019.csv")
LSOAdep<-readr::read_csv(inFile)
## Rows: 32844 Columns: 66
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): lsoa11cd, lsoa11nm, lsoa11nmw, LSOA01NM, LADcd, LADnm
## dbl (60): FID, st_areasha, st_lengths, IMD_Rank, IMD_Decile, IMDScore, IMDRa...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
LSOAdep$'LSOA11CD' <- LSOAdep$'lsoa11cd'
head(LSOAdep)
## # A tibble: 6 × 67
##     FID lsoa11cd  lsoa11nm     lsoa11nmw st_areasha st_lengths IMD_Rank IMD_Decile
##   <dbl> <chr>     <chr>        <chr>          <dbl>      <dbl>    <dbl>      <dbl>
## 1     1 E01000816 Bromley 040A Bromley …    299021.      3337.    19874          7
## 2     2 E01000818 Bromley 040B Bromley …   1292309.      7408.    20548          7
## 3     3 E01000819 Bromley 040C Bromley …    397713.      4384.     9866          4
## 4     4 E01000820 Bromley 008C Bromley …    587458.      4838.    13461          5
## 5     5 E01000822 Bromley 008D Bromley …    218524.      2738.    20757          7
## 6     6 E01000823 Bromley 008E Bromley …    242692.      3195.    22675          7
## # … with 59 more variables: LSOA01NM <chr>, LADcd <chr>, LADnm <chr>,
## #   IMDScore <dbl>, IMDRank0 <dbl>, IMDDec0 <dbl>, IncScore <dbl>,
## #   IncRank <dbl>, IncDec <dbl>, EmpScore <dbl>, EmpRank <dbl>, EmpDec <dbl>,
## #   EduScore <dbl>, EduRank <dbl>, EduDec <dbl>, HDDScore <dbl>, HDDRank <dbl>,
## #   HDDDec <dbl>, CriScore <dbl>, CriRank <dbl>, CriDec <dbl>, BHSScore <dbl>,
## #   BHSRank <dbl>, BHSDec <dbl>, EnvScore <dbl>, EnvRank <dbl>, EnvDec <dbl>,
## #   IDCScore <dbl>, IDCRank <dbl>, IDCDec <dbl>, IDOScore <dbl>, …
#names(LSOAdep)

ggplot(LSOAdep, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y = IMDScore, 
                   group = IMD_Decile,
                   colour = IMD_Decile)) +  
  geom_boxplot() +
  scale_color_continuous(name = "IMD Decile") + # rename the legend scale
  labs(x = "IMD Decile",
       y = "IMD Score",
       caption = "All LSOAs")

LSOAdep$la_name <- LSOAdep$LADnm
solent_dep <- getSolent(LSOAdep)
ggplot(solent_dep, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y = IMDScore, 
                   group = IMD_Decile,
                   colour = IMD_Decile)) +  
  geom_boxplot() +
  scale_color_continuous(name = "IMD Decile") + # rename the legend scale
  facet_wrap(. ~ la_name)

  labs(x = "IMD Decile",
       y = "IMD Score",
       caption = "Solent LSOAs split by Local Authority")
## $x
## [1] "IMD Decile"
## 
## $y
## [1] "IMD Score"
## 
## $caption
## [1] "Solent LSOAs split by Local Authority"
## 
## attr(,"class")
## [1] "labels"

NB: IMD Decile 10 has LOWER mean IMD score (less deprived)! Which is what it says in the Excel workbook…

So remember: IMD Decile 1 = 10% most deprived; IMD Decile 10 = 10% least deprived

2 Domestic Electricity consumption

2.1 2019

Load LSOA elec data - NB this is all LSOAs not just Solent

#Electricty consumption data LSOA pre downloaded
inFile <-paste0(dataFolder, "energy/electricity/LSOA Dom Elec csv/LSOA_ELEC_2019.csv")
domelec_LSOA_2019 <- readr::read_csv(inFile)
## Rows: 41727 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Local Authority Name, Local Authority Code, Middle Layer Super Outp...
## dbl (4): Total number of domestic electricity meters, Total domestic electri...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
domelec_LSOA_2019$LSOA11CD <- domelec_LSOA_2019$`Lower Layer Super Output Area (LSOA) Code`
head(domelec_LSOA_2019)
## # A tibble: 6 × 11
##   `Local Authority Name` `Local Authority… `Middle Layer Sup… `Middle Layer Sup…
##   <chr>                  <chr>             <chr>              <chr>             
## 1 Hartlepool             E06000001         Hartlepool 001     E02002483         
## 2 Hartlepool             E06000001         Hartlepool 001     E02002483         
## 3 Hartlepool             E06000001         Hartlepool 001     E02002483         
## 4 Hartlepool             E06000001         Hartlepool 001     E02002483         
## 5 Hartlepool             E06000001         Hartlepool 001     E02002483         
## 6 Hartlepool             E06000001         Hartlepool 001     E02002483         
## # … with 7 more variables: Lower Layer Super Output Area (LSOA) Name <chr>,
## #   Lower Layer Super Output Area (LSOA) Code <chr>,
## #   Total number of domestic electricity meters <dbl>,
## #   Total domestic electricity consumption (kWh) <dbl>,
## #   Mean domestic electricity consumption 
## (kWh per meter) <dbl>,
## #   Median domestic electricity consumption 
## (kWh per meter) <dbl>,
## #   LSOA11CD <chr>
domelec_LSOA_2019$mean_de2019 <- domelec_LSOA_2019$`Mean domestic electricity consumption 
(kWh per meter)`

2.1.1 Base map

# just add the variables we want
df <- dplyr::select(domelec_LSOA_2019, LSOA11CD, mean_de2019)

lsoa_merged_sf <- merge(lsoa_sf, df) #merging the LSOA boundaries and energy data
ggplot2::ggplot(lsoa_merged_sf) + 
  geom_sf(aes(fill = mean_de2019)) +
  scale_fill_continuous(name = "Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Electricity (2019, all Solent LSOAs)")

#Mapping it

2.1.2 Map with Overlay (leaflet)

# make a new df for use in the map (prevents confusion)
map_df <- merge(lsoa_sf_leaflet, df, by = "LSOA11CD")

qpal <- colorNumeric("Reds", df$mean_de2019, n=9)

leaflet(map_df) %>%
addTiles() %>%  # Add default OpenStreetMap map tiles
  addPolygons(color = ~qpal(mean_de2019),
              fillOpacity = 0.6, weight = 1.5, popup = ~(LSOA11CD), 
             
  
               highlight = highlightOptions(
                weight = 5,
                color = "#666",
                fillOpacity = 0.7,
                bringToFront = TRUE))

2.1.3 Looking at cities in more detail

#Cities such as Southampton disappear due to their density, we will now map Southampton alone to see this area in more detail
mapData <- dplyr::filter(lsoa_merged_sf, LAD11NM == "Southampton")
# plotting the electricity consumption of Southampton in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_de2019)) +
  scale_fill_continuous(name = "Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Southampton")

#Cities such as Portsmouth disappear due to their density, we will now map Portsmouth alone to see this area in more detail
mapData <- dplyr::filter(lsoa_merged_sf, LAD11NM == "Portsmouth")
# plotting the electricity consumption of Portsmouth in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_de2019)) +
  scale_fill_continuous(name = "Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Portsmouth")

#Cities such as Winchester disappear due to their density, we will now map Winchester alone to see this area in more detail
mapData <- dplyr::filter(lsoa_merged_sf, LAD11NM == "Winchester")

# plotting the electricity consumption of Winchester in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_de2019)) +
  scale_fill_continuous(name = "Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Winchester")

2.1.4 Plotting the relationship between Electricty consumption and deprivation

To see if higher electricity use is associated with higher deprivation

domelec_LSOA_2019$la_name <-domelec_LSOA_2019$`Local Authority Name`

solent_elec <- getSolent(domelec_LSOA_2019)

df <- merge(solent_dep, solent_elec, by = "LSOA11CD")

ggplot(df, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y = mean_de2019, 
                   group = IMD_Decile,
                   colour = IMD_Decile)) +  
  geom_boxplot() +
  scale_color_continuous(name = "IMD Decile") + # rename the legend scale
  labs(x = "IMD Decile",
       y = "Electricity use (mean kWh per meter)",
       caption = "Solent LSOAs, 2019")

With facets by LA

# or without colour but using facets for Local Authorities
ggplot(df, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =mean_de2019, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Electricity use (mean kWh per meter)",
       caption = "Solent LSOAs, 2019") +
  facet_wrap(la_name.x ~ .)

2.1.5 Plotting over 65s against dom elec consumption

inFile<-paste0(dataFolder, "census/KS102EW-age-categories.csv")
census_age<-readr::read_csv (inFile)
## Rows: 34753 Columns: 25
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): geography, geography code, Rural Urban
## dbl (22): date, Age: All usual residents; measures: Value, Age: Age 0 to 4; ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
census_age$'LSOA11CD'<- census_age$'geography code'
census_age$prop_65m <- census_age$`Over 65s`/census_age$`Age: All usual residents; measures: Value`
head(census_age)
## # A tibble: 6 × 27
##    date geography       `geography code` `Rural Urban` `Age: All usual resident…
##   <dbl> <chr>           <chr>            <chr>                             <dbl>
## 1  2011 Darlington 001B E01012334        Total                              2466
## 2  2011 Darlington 001C E01012335        Total                              1383
## 3  2011 Darlington 001D E01012366        Total                              2008
## 4  2011 Darlington 001E E01033481        Total                              1364
## 5  2011 Darlington 001F E01033482        Total                              1621
## 6  2011 Darlington 002C E01012323        Total                              1563
## # … with 22 more variables: Age: Age 0 to 4; measures: Value <dbl>,
## #   Age: Age 5 to 7; measures: Value <dbl>,
## #   Age: Age 8 to 9; measures: Value <dbl>,
## #   Age: Age 10 to 14; measures: Value <dbl>,
## #   Age: Age 15; measures: Value <dbl>,
## #   Age: Age 16 to 17; measures: Value <dbl>,
## #   Age: Age 18 to 19; measures: Value <dbl>, …
names(census_age)
##  [1] "date"                                     
##  [2] "geography"                                
##  [3] "geography code"                           
##  [4] "Rural Urban"                              
##  [5] "Age: All usual residents; measures: Value"
##  [6] "Age: Age 0 to 4; measures: Value"         
##  [7] "Age: Age 5 to 7; measures: Value"         
##  [8] "Age: Age 8 to 9; measures: Value"         
##  [9] "Age: Age 10 to 14; measures: Value"       
## [10] "Age: Age 15; measures: Value"             
## [11] "Age: Age 16 to 17; measures: Value"       
## [12] "Age: Age 18 to 19; measures: Value"       
## [13] "Age: Age 20 to 24; measures: Value"       
## [14] "Age: Age 25 to 29; measures: Value"       
## [15] "Age: Age 30 to 44; measures: Value"       
## [16] "Age: Age 45 to 59; measures: Value"       
## [17] "Age: Age 60 to 64; measures: Value"       
## [18] "Age: Age 65 to 74; measures: Value"       
## [19] "Age: Age 75 to 84; measures: Value"       
## [20] "Age: Age 85 to 89; measures: Value"       
## [21] "Age: Age 90 and over; measures: Value"    
## [22] "Over 65s"                                 
## [23] "Proportion Over 65"                       
## [24] "Age: Mean Age; measures: Value"           
## [25] "Age: Median Age; measures: Value"         
## [26] "LSOA11CD"                                 
## [27] "prop_65m"
message("Original Proportion Over 65:")
## Original Proportion Over 65:
summary(census_age$`Proportion Over 65`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1100  0.1600  0.1675  0.2100  0.6100
message("New prop_65m:")
## New prop_65m:
summary(census_age$prop_65m)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1148  0.1628  0.1675  0.2128  0.6068
#BROKEN
#domelec_LSOA_2019$LSOA11CD <-domelec_LSOA_2019$`Local Authority Name`
#census_age_2019elec <- merge(domelec_LSOA_2019, census_age, by= "LSOA11CD")
#head(census_age_2019elec)
#Select varibale we want
#aggec_2019 <- dplyr::select(census_age_2019elec, LSOA11CD, "Local Authority Name", "Proportion Over 65", mean_de2019, prop_65m )
#head(aggec_2019)
domelec_LSOA_2019$LSOA11CD <- domelec_LSOA_2019$`Lower Layer Super Output Area (LSOA) Code`
Census_age_elec2019 <- merge(domelec_LSOA_2019, census_age, by = "LSOA11CD")
# select the variables we want for clarity

de_age <- dplyr::select(Census_age_elec2019, LSOA11CD, "Local Authority Name",mean_de2019, "Proportion Over 65", prop_65m)
head(de_age)
##    LSOA11CD Local Authority Name mean_de2019 Proportion Over 65   prop_65m
## 1 E01000001       City of London    4058.907               0.18 0.18293515
## 2 E01000002       City of London    3807.044               0.19 0.18732591
## 3 E01000003       City of London    2586.591               0.19 0.18870728
## 4 E01000005       City of London    2706.989               0.13 0.12893401
## 5 E01000006 Barking and Dagenham    4243.872               0.08 0.07809748
## 6 E01000007 Barking and Dagenham    2705.298               0.04 0.04169662
# table(df$`LA Name`)

de_age$la_name <- de_age$`Local Authority Name` # make a new variable with an easier name
# select the places we want here
getSolent <- function(df){
  # assumes we are filtering on la_name
  res <- dplyr::filter(df, la_name == "Basingstoke and Deane"|
                           la_name == "East Hampshire"|
                           la_name == "Eastleigh"|
                           la_name == "Fareham"|
                           la_name == "Gosport"|
                           la_name == "Hart"|
                           la_name == "Havant"|
                           la_name == "New Forest"|
                           la_name == "Portsmouth"|
                           la_name == "Rushmoor"|
                           la_name == "Southampton"|
                           la_name == "Test Valley"|
                           la_name == "Winchester"|
                           la_name == "Isle of Wight"
                           )
return(res)
}
Solent_2019 <- getSolent(de_age) # use our function to get just Solent LAs
nrow(Solent_2019)
## [1] 1194
ggplot(Solent_2019, aes(x= 100 * prop_65m, 
                   y= mean_de2019, 
                   colour = la_name)) + # use colour for each LA
  geom_point() +
  geom_smooth() + # adds a smoothed best fit line
  theme(legend.position = "None") + # otherwise we get the colour legend at the side
  labs(x = "% aged > 65, 2011 Census",
       y = "Domestic Electricity COnsumption") +
  facet_wrap(. ~ la_name) # draws a separate plot for each LA (in a different colour)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

message("Correlation coeficient across all LSOAs (2019)")
## Correlation coeficient across all LSOAs (2019)
cor(Solent_2019$prop_65m, Solent_2019$mean_de2019)
## [1] 0.1899296
# you'll need this data (on sharepoint)
oac_df <- readr::read_csv(paste0(dataFolder, "oac/lsoa-oac-data.csv"))
## New names:
## * `` -> ...10
## * `` -> ...11
## * `` -> ...12
## * `` -> ...13
## * `` -> ...14
## * ...
## Rows: 42776 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): SOA Code, SOA Name, LA Code, LA Name, Region/Country, Supergroup Na...
## dbl (1): Supergroup Code
## lgl (7): ...10, ...11, ...12, ...13, ...14, ...15, ...16
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
oac_df$la_name <- oac_df$`LA Name`

oac_df$LSOA11CD <- oac_df$`SOA Code`

solent_oac_df <- getSolent(oac_df)

df <- merge(Solent_2019, oac_df, by = "LSOA11CD")

ggplot(df, aes(x= 100 * prop_65m, 
               y= mean_de2019, 
               colour = `LA Name`)) + # use colour for each LA
  geom_point() +
  #theme(legend.position = "None") + # otherwise we get the colour legend at the side
  labs(x = "% aged > 65, 2011 Census",
       y = "Domestic Electricity COnsumption") +
  facet_wrap(. ~ `Supergroup Name`) # draws a separate plot for each LA (in a different colour)

2.1.6 Difference between 2010 and 2019 dom elec

domelec_LSOA_2010 <- readr::read_csv(paste0(dataFolder,"energy/electricity/LSOA Dom Elec csv/LSOA_ELEC_2010.csv"))
## Rows: 41730 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Local Authority Name, Local Authority Code, Middle Layer Super Outp...
## dbl (4): Total number of domestic electricity meters, Total domestic electri...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
domelec_LSOA_2010$mean_de2010 <- domelec_LSOA_2010$'Mean domestic electricity consumption \n(kWh per meter)'

# create unambiguous lsoa code for matching to each other and the sf geometry
domelec_LSOA_2010$LSOA11CD <- domelec_LSOA_2010$`Lower Layer Super Output Area (LSOA) Code`
# just the vars we want
df <- dplyr::select(domelec_LSOA_2010, LSOA11CD, mean_de2010)
# reuse the 2019 data we already analysed
merged_de <- merge(df, domelec_LSOA_2019, by="LSOA11CD", all=TRUE)
#names(merged_de)

merged_de$mean_de_diff <- as.numeric(merged_de$mean_de2019)-as.numeric(merged_de$mean_de2010) 
merged_de$mean_de_diff_pc <- 100*(merged_de$mean_de_diff/merged_de$mean_de2010)
summary(merged_de$mean_de_diff)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -5873.3  -711.7  -553.2  -582.8  -411.2  2456.0       3
summary(merged_de$mean_de_diff_pc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -64.93  -17.30  -13.86  -14.03  -10.40   77.31       3

2.1.6.1 Basic map

# merge to sf geometry on LSOA11CD
lsoa_sf_de20102019<-merge(lsoa_sf, merged_de, by = "LSOA11CD")
ggplot2::ggplot(lsoa_sf_de20102019) +
  geom_sf(aes(fill=mean_de_diff))+
  scale_fill_continuous(name="Change in Mean consumpton (kWh per meter)",low="green",high="red") +
  labs(caption="Solent (all LSOAs)")

t_diff <-  dplyr::select(as.data.frame(lsoa_sf_de20102019), 
                         LSOA11CD,
                         mean_de2010, mean_de2019, mean_de_diff, mean_de_diff_pc)
head(arrange(t_diff, mean_de_diff_pc))
##    LSOA11CD mean_de2010 mean_de2019 mean_de_diff mean_de_diff_pc
## 1 E01023199    6265.640    3940.512    -2325.128       -37.10919
## 2 E01023201    4715.905    2992.417    -1723.488       -36.54628
## 3 E01022546    5373.513    3416.407    -1957.106       -36.42134
## 4 E01017278    3772.492    2439.883    -1332.610       -35.32438
## 5 E01023224    6119.198    3960.261    -2158.938       -35.28138
## 6 E01022653    5346.714    3549.718    -1796.996       -33.60935
head(arrange(t_diff, -mean_de_diff_pc))
##    LSOA11CD mean_de2010 mean_de2019 mean_de_diff mean_de_diff_pc
## 1 E01017032    3019.755    3296.955     277.2005        9.179569
## 2 E01032842    4437.491    4738.937     301.4457        6.793156
## 3 E01022738    2729.289    2891.538     162.2488        5.944728
## 4 E01022631    6112.007    6375.912     263.9044        4.317803
## 5 E01022489    7585.273    7845.624     260.3506        3.432317
## 6 E01022592    7172.338    7403.228     230.8898        3.219171
ggplot2::ggplot(t_diff, aes(x = mean_de_diff_pc)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.1.6.2 Change map with Overlay (leaflet)

# merge just the variables we want to the leaflet sf
df <- dplyr::select(merged_de, LSOA11CD, mean_de2010, mean_de2019, mean_de_diff, mean_de_diff_pc)
lsoa_sf_leaflet <- merge(lsoa_sf_leaflet, df) # add on to the 
#names(lsoa_sf_leaflet)

qpal <- colorNumeric("Reds", lsoa_sf_leaflet$mean_de_diff, n=9)

# we already checked & transformed the boundaries for leaflet so re-use
leaflet(lsoa_sf_leaflet) %>% 
  addTiles() %>%  # Add default OpenStreetMap map tiles
  addPolygons(color = ~qpal(mean_de_diff),
              fillOpacity = 0.6, weight = 1.6, popup = ~(LSOA11CD ), 
             
  
               highlight = highlightOptions(
                weight = 6,
                color = "#666",
                fillOpacity = 0.8,
                bringToFront = TRUE))

2.1.6.3 Cities in more detail

#Cities such as Southampton disappear due to their density, we will now map Southampton alone to see this area in more detail
mapData <- dplyr::filter(lsoa_sf_de20102019, LAD11NM == "Southampton")
# plotting the electricity consumption of Southampton in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill= mean_de_diff)) +
  scale_fill_continuous(name = "Change in Mean consumpton (kWh per meter)", low = "green", high = "red") +
  labs(caption = "Southampton")

#BROKEN
#Cities such as Portsmouth disappear due to their density, we will now map Portsmouth alone to see this area in more detail
mapData <- dplyr::filter(lsoa_sf_de20102019, LAD11NM == "Portsmouth")
# plotting the electricity consumption of Portsmouth in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_de_diff)) +
  scale_fill_continuous(name = "Change in Mean comsumpton (kWh per meter)", low = "green", high = "red") +
  labs(caption = "Portsmouth")

#Cities such as Winchester disappear due to their density, we will now map Winchester alone to see this area in more detail
mapData <- dplyr::filter(lsoa_sf_de20102019, LAD11NM == "Winchester")
# plotting the electricity consumption of Winchester in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_de_diff)) +
  scale_fill_continuous(name = "Change in Mean comsumpton (kWh per meter)", low = "green", high = "red") +
  labs(caption = "Winchester")

2.1.6.4 Plotting the change in dom elec consumption against the deprivation score

df <- dplyr::select(merged_de, LSOA11CD, mean_de_diff, mean_de2010, mean_de2019, mean_de_diff_pc)
LSOAdepelec <- merge(df, LSOAdep, by= "LSOA11CD")
#names(LSOAdepelec)

solent_elec <- getSolent(LSOAdepelec)
head(solent_elec)
##    LSOA11CD mean_de_diff mean_de2010 mean_de2019 mean_de_diff_pc   FID
## 1 E01017013    -488.0641    3702.356    3214.292       -13.18253 17179
## 2 E01017014    -678.0286    3991.290    3313.261       -16.98771 17180
## 3 E01017015    -514.1851    3668.720    3154.535       -14.01538 17181
## 4 E01017016    -590.5901    3909.523    3318.933       -15.10645 17182
## 5 E01017017    -429.7497    4101.945    3672.195       -10.47673 17183
## 6 E01017018    -682.7941    4079.974    3397.180       -16.73526 17184
##    lsoa11cd        lsoa11nm       lsoa11nmw st_areasha st_lengths IMD_Rank
## 1 E01017013 Portsmouth 012A Portsmouth 012A   145126.5   1856.920    12407
## 2 E01017014 Portsmouth 008A Portsmouth 008A   757386.9   5206.517    15332
## 3 E01017015 Portsmouth 014A Portsmouth 014A   472645.1   5215.706    14477
## 4 E01017016 Portsmouth 014B Portsmouth 014B   122926.9   2668.623    14960
## 5 E01017017 Portsmouth 014C Portsmouth 014C   166916.5   2596.481    21428
## 6 E01017018 Portsmouth 014D Portsmouth 014D   184942.3   2582.152    16262
##   IMD_Decile        LSOA01NM     LADcd      LADnm IMDScore IMDRank0 IMDDec0
## 1          4 Portsmouth 012A E06000044 Portsmouth   22.464    12407       4
## 2          5 Portsmouth 008A E06000044 Portsmouth   18.892    15332       5
## 3          5 Portsmouth 014A E06000044 Portsmouth   19.845    14477       5
## 4          5 Portsmouth 014B E06000044 Portsmouth   19.349    14960       5
## 5          7 Portsmouth 014C E06000044 Portsmouth   12.658    21428       7
## 6          5 Portsmouth 014D E06000044 Portsmouth   17.856    16262       5
##   IncScore IncRank IncDec EmpScore EmpRank EmpDec EduScore EduRank EduDec
## 1    0.106   15469      5    0.068   19320      6   30.333    8420      3
## 2    0.066   22396      7    0.055   23101      8   24.099   11292      4
## 3    0.094   17234      6    0.076   17293      6   18.252   14957      5
## 4    0.077   20105      7    0.052   23906      8   28.237    9297      3
## 5    0.064   22802      7    0.064   20459      7   11.811   20132      7
## 6    0.068   21959      7    0.059   21781      7   20.051   13731      5
##   HDDScore HDDRank HDDDec CriScore CriRank CriDec BHSScore BHSRank BHSDec
## 1    0.165   13549      5    0.360   11020      4   12.194   26353      9
## 2    0.064   15026      5    0.626    7377      3   14.358   23812      8
## 3    0.340   11096      4    0.656    7011      3   16.157   21470      7
## 4   -0.213   19329      6    0.327   11537      4   15.403   22475      7
## 5   -0.405   22178      7   -0.165   19129      6   13.252   25110      8
## 6   -0.235   19645      6    0.573    8092      3   10.881   27759      9
##   EnvScore EnvRank EnvDec IDCScore IDCRank IDCDec IDOScore IDORank IDODec
## 1   54.467    1248      1    0.157   13404      5    0.123   17028      6
## 2   50.479    1835      1    0.122   16881      6    0.067   25852      8
## 3   35.575    5984      2    0.133   15661      5    0.109   18806      6
## 4   57.087     950      1    0.100   19443      6    0.130   16167      5
## 5   38.706    4718      2    0.102   19243      6    0.065   26124      8
## 6   53.361    1384      1    0.094   20274      7    0.093   21058      7
##   CYPScore CYPRank CYPDec ASScore ASRank ASDec GBScore GBRank GBDec WBScore
## 1    0.937    4233      2   0.295  16557     6  -1.062  30234    10   0.655
## 2    0.529    8487      3   0.310  14844     5  -0.474  23667     8   0.507
## 3    0.115   14238      5   0.309  15009     5  -0.372  22080     7   0.746
## 4    0.827    5209      2   0.296  16424     6  -0.592  25422     8   0.939
## 5   -0.291   20831      7   0.281  18217     6  -0.501  24084     8   0.273
## 6    0.300   11444      4   0.301  15879     5  -1.532  32176    10   0.507
##   WBRank WBDec IndScore IndRank IndDec OutScore OutRank OutDec TotPop DepChi
## 1  12076     4    1.202    2441      1    1.146    3244      1   1789    463
## 2  12755     4    1.035    3608      2    1.248    2694      1   1635    328
## 3  11640     4    0.731    6461      2    0.638    7063      3   1496    291
## 4  10792     4    1.189    2521      1    1.389    2041      1   1589    341
## 5  13838     5    0.663    7167      3    1.069    3668      2   1433    227
## 6  12753     4    1.082    3248      1    1.366    2156      1   1600    290
##   Pop16_59 Pop60_ WorkPop Shape__Area Shape__Length    la_name
## 1     1106    220 1096.75    363247.0      2939.704 Portsmouth
## 2      991    316  972.75   1895866.7      8234.341 Portsmouth
## 3      872    333  860.25   1182876.9      8248.480 Portsmouth
## 4      973    275  980.25    307650.4      4223.008 Portsmouth
## 5      852    354  852.00    417678.9      4105.026 Portsmouth
## 6      930    380  925.25    462807.6      4085.935 Portsmouth
ggplot(solent_elec, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y = mean_de_diff_pc, 
                   group = IMD_Decile,
                   colour = IMD_Decile)) +  
  geom_boxplot() +
  scale_color_continuous(name = "IMD Decile") + # rename the legend scale
  labs(x = "IMD Decile",
       y = "% change in mean kWh per meter",
       caption = "Electricity 2010-2019, all Solent LSOAs")

Try adding facets for each Local Authority

# or without colour but using facets for Local Authorities
ggplot(solent_elec, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =mean_de_diff_pc, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "% change in mean kWh per meter",
       caption = "Electricity 2010-2019, all Solent LSOAs") +
  facet_wrap(la_name ~ .)

nrow(solent_elec)
## [1] 1194

Plot kWh change as a contrast to % change (which seems constant-ish across IMD)

# as a contrast to % change

ggplot(solent_elec, aes(x = mean_de2010, # forces a box plot at each value 
                   y =mean_de_diff,
                   colour = la_name)) +
  geom_point() +
  theme(legend.position="none") +
  geom_smooth() +
  labs(x = "Domestic mean kWh 2010",
       y = "Change in mean domestic kWh per meter",
       caption = "Electricity 2010-2019, all Solent LSOAs") +
  facet_wrap(la_name ~ .)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(solent_elec, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =mean_de_diff, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Change in mean domestic kWh per meter",
       caption = "Electricity 2010-2019, all Solent LSOAs") 

# or without colour but using facets for Local Authorities
# first with fixed scales
ggplot(solent_elec, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =mean_de_diff, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Change in mean domestic kWh per meter",
       caption = "Electricity 2010-2019, all Solent LSOAs") +
  facet_wrap(la_name ~ .)

Figure 2.1

# now with 'free' scales
ggplot(solent_elec, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =mean_de_diff, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Change in mean domestic kWh per meter",
       caption = "Electricity 2010-2019, all Solent LSOAs") +
  facet_wrap(la_name ~ . , scales = "free_y")
freeScaleElecIMD

Figure 2.1: freeScaleElecIMD

nrow(solent_elec)
## [1] 1194

So there is a slightly downward trend - those in lower deprivation deciles are reducing slightly more.

3 Mapping Gas

3.1 2019

#Electricty consumption data LSOA pre downloaded
inFile <-paste0(dataFolder, "energy/gas/LSOA Gas csv/LSOA_GAS_2019.csv")
domgas_LSOA_2019 <- readr::read_csv(inFile)
## Rows: 40213 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Local Authority Name, Local Authority Code, MSOA Name, Middle Layer...
## dbl (4): Number of consuming meters, Consumption (kWh), Mean consumption (kW...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
domgas_LSOA_2019$LSOA11CD <- domgas_LSOA_2019$`Lower Layer Super Output Area (LSOA) Code`

domgas_LSOA_2019$mean_dg2019 <- domgas_LSOA_2019$`Mean consumption (kWh per meter)`

head(domgas_LSOA_2019)
## # A tibble: 6 × 13
##   `Local Authority… `Local Authority… `MSOA Name` `Middle Layer Sup… `LSOA Name`
##   <chr>             <chr>             <chr>       <chr>              <chr>      
## 1 Hartlepool        E06000001         Hartlepool… E02002483          Hartlepool…
## 2 Hartlepool        E06000001         Hartlepool… E02002483          Hartlepool…
## 3 Hartlepool        E06000001         Hartlepool… E02002483          Hartlepool…
## 4 Hartlepool        E06000001         Hartlepool… E02002483          Hartlepool…
## 5 Hartlepool        E06000001         Hartlepool… E02002483          Hartlepool…
## 6 Hartlepool        E06000001         Hartlepool… E02002483          Hartlepool…
## # … with 8 more variables: Lower Layer Super Output Area (LSOA) Code <chr>,
## #   Number of consuming meters <dbl>, Consumption (kWh) <dbl>,
## #   Mean consumption (kWh per meter) <dbl>,
## #   Median consumption (kWh per meter) <dbl>,
## #   Number of non-consuming meters <chr>, LSOA11CD <chr>, mean_dg2019 <dbl>

3.1.1 Basic map

df <- dplyr::select(domgas_LSOA_2019, LSOA11CD, mean_dg2019)
lsoa_merged_sf_gas <- merge(lsoa_sf, df) #merging the boundaries and energy data
ggplot2::ggplot(lsoa_merged_sf_gas) + 
  geom_sf(aes(fill = mean_dg2019)) +
  scale_fill_continuous(name = "Gas: Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Solent (all LSOAs)")

#Mapping it

Aha - this one shows where there is no gas :-)

3.1.2 Map with Overlay

# re-use the df from previous chunk
lsoa_sf_leaflet <- merge(lsoa_sf_leaflet, df, by = "LSOA11CD")
qpal <- colorNumeric("Reds", lsoa_sf_leaflet$mean_dg2019, n=9)

leaflet(lsoa_sf_leaflet) %>%
  addTiles() %>%
  addPolygons(color= ~qpal(mean_dg2019), 
              fillOpacity = 0.6, weight = 1.5, popup = ~(LSOA11CD),
              
              highlight=highlightOptions(
                weight=5,
                color="#666",
                fillOpacity = 0.7,
                bringToFront = TRUE))

3.1.3 Cities in More detail

#Cities such as Southampton disappear due to their density, we will now map Southampton alone to see this area in more detail
mapData <- dplyr::filter(lsoa_merged_sf_gas, LAD11NM == "Southampton")

# plotting the gas consumption of Southampton in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_dg2019)) +
  scale_fill_continuous(name = "Gas: Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Southampton")

#Cities such as Portsmouth disappear due to their density, we will now map Portsmouth alone to see this area in more detail
mapData <- dplyr::filter(lsoa_merged_sf_gas, LAD11NM == "Portsmouth")

# plotting the gas consumption of Portsmouth in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_dg2019)) +
  scale_fill_continuous(name = "Gas: Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Portsmouth")

#Cities such as Winchester disappear due to their density, we will now map Winchester alone to see this area in more detail
mapData <- dplyr::filter(lsoa_merged_sf_gas, LAD11NM == "Winchester")

# plotting the gas consumption of Winchester in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = mean_dg2019)) +
  scale_fill_continuous(name = "Gas: Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "Winchester")

### Plotting gas usage against proportion of over 65s

domelec_LSOA_2019$LSOA11CD <- domelec_LSOA_2019$`Lower Layer Super Output Area (LSOA) Code`
Census_age_elec2019 <- merge(domelec_LSOA_2019, census_age, by = "LSOA11CD")
# select the variables we want for clarity

de_age <- dplyr::select(Census_age_elec2019, LSOA11CD, "Local Authority Name",mean_de2019, "Proportion Over 65", prop_65m)
head(de_age)
##    LSOA11CD Local Authority Name mean_de2019 Proportion Over 65   prop_65m
## 1 E01000001       City of London    4058.907               0.18 0.18293515
## 2 E01000002       City of London    3807.044               0.19 0.18732591
## 3 E01000003       City of London    2586.591               0.19 0.18870728
## 4 E01000005       City of London    2706.989               0.13 0.12893401
## 5 E01000006 Barking and Dagenham    4243.872               0.08 0.07809748
## 6 E01000007 Barking and Dagenham    2705.298               0.04 0.04169662
# table(df$`LA Name`)

de_age$la_name <- de_age$`Local Authority Name` # make a new variable with an easier name
# select the places we want here
getSolent <- function(df){
  # assumes we are filtering on la_name
  res <- dplyr::filter(df, la_name == "Basingstoke and Deane"|
                           la_name == "East Hampshire"|
                           la_name == "Eastleigh"|
                           la_name == "Fareham"|
                           la_name == "Gosport"|
                           la_name == "Hart"|
                           la_name == "Havant"|
                           la_name == "New Forest"|
                           la_name == "Portsmouth"|
                           la_name == "Rushmoor"|
                           la_name == "Southampton"|
                           la_name == "Test Valley"|
                           la_name == "Winchester"|
                           la_name == "Isle of Wight"
                           )
return(res)
}
Solent_2019 <- getSolent(de_age) # use our function to get just Solent LAs
nrow(Solent_2019)
## [1] 1194
ggplot(Solent_2019, aes(x= 100 * prop_65m, 
                   y= mean_de2019, 
                   colour = la_name)) + # use colour for each LA
  geom_point() +
  geom_smooth() + # adds a smoothed best fit line
  theme(legend.position = "None") + # otherwise we get the colour legend at the side
  labs(x = "% aged > 65, 2011 Census",
       y = "Domestic Electricity COnsumption") +
  facet_wrap(. ~ la_name) # draws a separate plot for each LA (in a different colour)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

message("Correlation coeficient across all LSOAs (2019)")
## Correlation coeficient across all LSOAs (2019)
cor(Solent_2019$prop_65m, Solent_2019$mean_de2019)
## [1] 0.1899296

3.1.4 Plotting gas usage against deprivation

domgas_LSOA_2019$la_name <-domgas_LSOA_2019$`Local Authority Name`

solent_gas <- getSolent(domgas_LSOA_2019)

df <- merge(solent_dep, solent_gas, by = "LSOA11CD")

ggplot(df, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y = mean_dg2019, 
                   group = IMD_Decile,
                   colour = IMD_Decile)) +  
  geom_boxplot() +
  scale_color_continuous(name = "IMD Decile") + # rename the legend scale
  labs(x = "IMD Decile",
       y = "Gas Consumption")

Much more of a correlation with low deprivation (higher deciles)

# or without colour but using facets for Local Authorities
ggplot(df, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =mean_dg2019, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Gas Consumption") +
  facet_wrap(la_name.y ~ .)

3.1.5 Difference between 2010 and 2019 dom gas

domgas_LSOA_2010 <- readr::read_csv(paste0(dataFolder, "energy/gas/LSOA Gas csv/LSOA_GAS_2010.csv"))
## Rows: 40122 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Local Authority Name, Local Authority Code, MSOA Name, Middle Layer...
## dbl (4): Number of meters, Consumption (kWh), Mean consumption (kWh per mete...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
domgas_LSOA_2010$mean_dg2010 <- domgas_LSOA_2010$'Mean consumption (kWh per meter)'
domgas_LSOA_2010$LSOA11CD <- domgas_LSOA_2010$`Lower Layer Super Output Area (LSOA) Code`

df <- dplyr::select(domgas_LSOA_2010, LSOA11CD, mean_dg2010) # just the vars we need

merged_dgas <- merge(domgas_LSOA_2019, df, by = "LSOA11CD", all = TRUE)

#names(merged_dgas)
merged_dgas$dg_diff<-as.numeric(merged_dgas$mean_dg2019) - merged_dgas$mean_dg2010
summary(merged_dgas)
##    LSOA11CD         Local Authority Name Local Authority Code
##  Length:40226       Length:40226         Length:40226        
##  Class :character   Class :character     Class :character    
##  Mode  :character   Mode  :character     Mode  :character    
##                                                              
##                                                              
##                                                              
##                                                              
##   MSOA Name         Middle Layer Super Output Area (MSOA) Code
##  Length:40226       Length:40226                              
##  Class :character   Class :character                          
##  Mode  :character   Mode  :character                          
##                                                               
##                                                               
##                                                               
##                                                               
##   LSOA Name         Lower Layer Super Output Area (LSOA) Code
##  Length:40226       Length:40226                             
##  Class :character   Class :character                         
##  Mode  :character   Mode  :character                         
##                                                              
##                                                              
##                                                              
##                                                              
##  Number of consuming meters Consumption (kWh)  
##  Min.   :    5.0            Min.   :1.860e+04  
##  1st Qu.:  492.0            1st Qu.:6.166e+06  
##  Median :  615.0            Median :8.062e+06  
##  Mean   :  599.2            Mean   :8.087e+06  
##  3rd Qu.:  705.0            3rd Qu.:9.839e+06  
##  Max.   :87991.0            Max.   :1.018e+09  
##  NA's   :13                 NA's   :13         
##  Mean consumption (kWh per meter) Median consumption (kWh per meter)
##  Min.   : 1193                    Min.   :  537.1                   
##  1st Qu.:11476                    1st Qu.:10361.9                   
##  Median :13197                    Median :12045.5                   
##  Mean   :13768                    Mean   :12531.2                   
##  3rd Qu.:15384                    3rd Qu.:14090.4                   
##  Max.   :47003                    Max.   :49456.0                   
##  NA's   :13                       NA's   :13                        
##  Number of non-consuming meters  mean_dg2019      la_name         
##  Length:40226                   Min.   : 1193   Length:40226      
##  Class :character               1st Qu.:11476   Class :character  
##  Mode  :character               Median :13197   Mode  :character  
##                                 Mean   :13768                     
##                                 3rd Qu.:15384                     
##                                 Max.   :47003                     
##                                 NA's   :13                        
##   mean_dg2010         dg_diff      
##  Min.   :  553.6   Min.   :-22666  
##  1st Qu.:12997.8   1st Qu.: -2090  
##  Median :14915.0   Median : -1601  
##  Mean   :15444.4   Mean   : -1676  
##  3rd Qu.:17241.8   3rd Qu.: -1143  
##  Max.   :52729.2   Max.   : 23242  
##  NA's   :104       NA's   :117

3.1.5.1 Plotting the change in gas usage against the deprivation

LSOAdepgas <- merge(merged_dgas, LSOAdep, by= "LSOA11CD")
#names(LSOAdepgas)
LSOAdepgas$MSOA_name <- LSOAdepgas$`MSOA Name`
df <- dplyr::select(LSOAdepgas, la_name.x,MSOA_name, LSOA11CD,  IMD_Decile, mean_dg2019, IMDScore, mean_dg2010,mean_dg2019, dg_diff 
)

head(df)
##              la_name.x                MSOA_name  LSOA11CD IMD_Decile
## 1       City of London       City of London 001 E01000001          9
## 2       City of London       City of London 001 E01000002         10
## 3       City of London       City of London 001 E01000003          5
## 4       City of London       City of London 001 E01000005          3
## 5 Barking and Dagenham Barking and Dagenham 016 E01000006          5
## 6 Barking and Dagenham Barking and Dagenham 015 E01000007          3
##   mean_dg2019 IMDScore mean_dg2010    dg_diff
## 1   11084.444    6.208   13298.600 -2214.1563
## 2   25129.528    5.143   13216.867 11912.6609
## 3    7870.607   19.402    8973.772 -1103.1645
## 4    6955.060   28.652    7836.029  -880.9685
## 5   16068.696   19.837   16774.027  -705.3315
## 6    8118.158   31.576    9469.377 -1351.2193
df$la_name <- df$la_name.x
solent_gas <- getSolent(df)
head(solent_gas)
##    la_name.x      MSOA_name  LSOA11CD IMD_Decile mean_dg2019 IMDScore
## 1 Portsmouth Portsmouth 012 E01017013          4    10222.49   22.464
## 2 Portsmouth Portsmouth 008 E01017014          5    11336.27   18.892
## 3 Portsmouth Portsmouth 014 E01017015          5    10204.14   19.845
## 4 Portsmouth Portsmouth 014 E01017016          5    10837.64   19.349
## 5 Portsmouth Portsmouth 014 E01017017          7    11435.73   12.658
## 6 Portsmouth Portsmouth 014 E01017018          5    11628.42   17.856
##   mean_dg2010   dg_diff    la_name
## 1    12059.10 -1836.611 Portsmouth
## 2    13059.83 -1723.558 Portsmouth
## 3    11795.50 -1591.360 Portsmouth
## 4    12394.83 -1557.193 Portsmouth
## 5    13123.21 -1687.480 Portsmouth
## 6    13311.45 -1683.026 Portsmouth
ggplot(solent_gas, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =100*(dg_diff/mean_dg2010), 
                   group = IMD_Decile,
                   colour = IMD_Decile)) +  
  geom_boxplot() +
  scale_color_continuous(name = "IMD Decile") + # rename the legend scale
  labs(x = "IMD Decile",
       y = "% change",
       caption = "Mean gas kWh 2010-2019, all Solent LSOas")
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).

Try adding facets for each Local Authority and remove the outlier where % change > 50

message("Dropping:")
## Dropping:
filter(solent_gas,100*(dg_diff/mean_dg2010)>50)
##     la_name.x       MSOA_name  LSOA11CD IMD_Decile mean_dg2019 IMDScore
## 1 Southampton Southampton 032 E01017281          1    8429.059   62.655
##   mean_dg2010  dg_diff     la_name
## 1    4264.534 4164.525 Southampton
# or without colour but using facets for Local Authorities
solent_gasex <- filter(solent_gas,100*(dg_diff/mean_dg2010)<50)
ggplot(solent_gasex, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =100*(dg_diff/mean_dg2010), 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "% change in Gas consumption",
       caption = "Mean gas kWh 2010-2019, all Solent LSOAs with outlier removed") +
  facet_wrap(la_name ~ .)

nrow(solent_gasex)
## [1] 1160

As for electricity, plot the mean kWh difference. Keep the outlier.

# as a contrast to % change

ggplot(solent_gas, aes(x = mean_dg2010, # forces a box plot at each value 
                   y =dg_diff,
                   colour = la_name)) +
  geom_point() +
  theme(legend.position="none") +
  geom_smooth() +
  labs(x = "Domestic mean kWh 2010",
       y = "Change in mean domestic kWh per meter",
       caption = "Gas 2010-2019, all Solent LSOAs") +
  facet_wrap(la_name ~ .)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing missing values (geom_point).

ggplot(solent_gas, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =dg_diff, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Change in mean domestic kWh per meter",
       caption = "Gas 2010-2019, all Solent LSOAs") 
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).

# or without colour but using facets for Local Authorities
# first with fixed scales
ggplot(solent_gas, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =dg_diff, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Change in mean domestic kWh per meter",
       caption = "Gas 2010-2019, all Solent LSOAs") +
  facet_wrap(la_name ~ .)
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).

Figure 3.1

# now with 'free' scales to see pattern in each LA
ggplot(solent_gas, aes(x = as.factor(IMD_Decile), # forces a box plot at each value 
                   y =dg_diff, 
                   group = IMD_Decile)) +
  geom_boxplot() +
  labs(x = "IMD Decile",
       y = "Change in mean domestic kWh per meter",
       caption = "Gas 2010-2019, all Solent LSOAs") +
  facet_wrap(la_name ~ ., scales = "free_y")
## Warning: Removed 5 rows containing non-finite values (stat_boxplot).
freeScaleGasIMD

Figure 3.1: freeScaleGasIMD

nrow(solent_gas)
## [1] 1166

3.1.5.2 Basic change map

#Green indicates a decrease in gas consumption from 2010 to 2019
#Red indicates an increase in gas consumption from 2010 to 2019

df <- dplyr::select(merged_dgas, LSOA11CD, mean_dg2010, mean_dg2019, dg_diff)
lsoa_dgas20102019 <- merge(lsoa_sf, df, by = "LSOA11CD") #merging these
ggplot2::ggplot(lsoa_dgas20102019) + geom_sf(aes(fill=dg_diff))+ 
                  scale_fill_continuous(name="Change in Gas: Mean kWh per meter", low="green",high="red")+
  labs(caption ="Solent (all MSOAs)") ##BROKEN

##Need to change the varibale names to reflect what is actually there

lsoa_dgas20102019$pc_diff <- 100*(lsoa_dgas20102019$dg_diff/lsoa_dgas20102019$mean_dg2010)
t_diff <-  dplyr::select(as.data.frame(lsoa_dgas20102019), 
                         LSOA11CD, LSOA11NM,
                         mean_dg2010, mean_dg2019, dg_diff, pc_diff)
head(arrange(t_diff, pc_diff))
##    LSOA11CD         LSOA11NM mean_dg2010 mean_dg2019   dg_diff   pc_diff
## 1 E01032746 Southampton 029F    12086.10    5619.771 -6466.329 -53.50220
## 2 E01023224  Winchester 014A    14088.72    8538.244 -5550.481 -39.39662
## 3 E01032745 Southampton 029E    11305.62    6991.681 -4313.935 -38.15745
## 4 E01023155 Test Valley 003A    15019.06    9591.014 -5428.046 -36.14105
## 5 E01023199 Test Valley 013D    15604.59   10074.671 -5529.917 -35.43776
## 6 E01023225  Winchester 004A    15339.93   10214.353 -5125.581 -33.41332
head(arrange(t_diff, -pc_diff))
##    LSOA11CD         LSOA11NM mean_dg2010 mean_dg2019   dg_diff   pc_diff
## 1 E01017281 Southampton 032D    4264.534    8429.059 4164.5247 97.654849
## 2 E01032755 Southampton 029I    4501.097    5207.299  706.2018 15.689547
## 3 E01017202 Southampton 016C    8637.537    9876.979 1239.4416 14.349480
## 4 E01017140 Southampton 023D   10685.891   11672.833  986.9429  9.235944
## 5 E01017114  Portsmouth 004C    7523.706    8158.664  634.9574  8.439423
## 6 E01017207 Southampton 015A    7734.902    8152.091  417.1893  5.393595
ggplot2::ggplot(t_diff, aes(x = pc_diff)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 5 rows containing non-finite values (stat_bin).

3.1.5.3 Change map with overlay

Using leaflet

lsoa_sf_leaflet <- merge(lsoa_sf_leaflet, t_diff, by = "LSOA11CD")

qpal <- colorNumeric("Reds", lsoa_sf_leaflet$dg_diff, n=9)

leaflet(lsoa_sf_leaflet) %>%
  addTiles() %>%
  addPolygons(color = ~qpal(dg_diff),
              fillOpacity = 0.6, weight = 1.5, popup = ~(LSOA11CD), 
             
  
               highlight = highlightOptions(
                weight = 5,
                color = "#666",
                fillOpacity = 0.7,
                bringToFront = TRUE))

3.1.5.4 Cities in more detail

#Cities such as Southampton disappear due to their density, we will now map Southampton alone to see this area in more detail
mapData <- dplyr::filter(lsoa_dgas20102019, LAD11NM == "Southampton")

# plotting the gas consumption of Southampton in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = dg_diff)) +
  scale_fill_continuous(name = "Gas: Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "2010-2019, Southampton LSOAs")

#Cities such as Portsmouth disappear due to their density, we will now map Portsmouth alone to see this area in more detail
mapData <- dplyr::filter(lsoa_dgas20102019, LAD11NM == "Portsmouth")

# plotting the gas consumption of Portsmouth in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = dg_diff)) +
  scale_fill_continuous(name = "Gas: Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "2010-2019, Portsmouth LSOAs")

#Cities such as Winchester disappear due to their density, we will now map Winchester alone to see this area in more detail
mapData <- dplyr::filter(lsoa_dgas20102019, LAD11NM == "Winchester")

# plotting the gas consumption of Winchester in more detail
ggplot2::ggplot(mapData) + 
  geom_sf(aes(fill = dg_diff)) +
  scale_fill_continuous(name = "Gas: Mean kWh per meter", low = "green", high = "red") +
  labs(caption = "2010-2019, Winchester LSOAs")